Step1: Compute AUROC and ACC Row Sums and Median values under different weights

We identify the top methods for network structure prediction by computing the area under the receiver operating characteristic (AUROC) and accuracy (ACC) row sum and median values for each of the 28 networks. We weight these variables at 5 different ranges:

for (sim in (1:28)){
  AUC<-read.csv(paste0("sim",sim,"/Combined_Data_onlyAUC.csv"),row.names = 1)
  ACC<-read.csv(paste0("sim",sim,"/Combined_Data_onlyACC.csv"),row.names = 1)

  #calculate AUC only sums and medians
  AUC_sum<-cbind(AUC$Method_name,rowSums(AUC[,-1])) %>% as.data.frame()
  colnames(AUC_sum)<-c("Method_name","Sum_of_avg_across_patients")
  AUC_sum$Sum_of_avg_across_patients<-as.numeric(AUC_sum$Sum_of_avg_across_patients)

  AUC_median<-cbind(AUC$Method_name,apply(AUC[-1], 1, median)) %>% as.data.frame()
  colnames(AUC_median)<-c("Method_name","Median_across_patients")

  write.csv(AUC_sum,paste0('sim',sim,'/Combined_data_sums_onlyAUC.csv'),quote=F)
  write.csv(AUC_median,paste0('sim',sim,'/Combined_data_median_onlyAUC.csv'),quote=F)

  #calculate ACC only sums and medians
  ACC_sum<-cbind(ACC$Method_name,rowSums(ACC[,-1])) %>% as.data.frame()
  colnames(ACC_sum)<-c("Method_name","Sum_of_avg_across_patients")
  ACC_sum$Sum_of_avg_across_patients<-as.numeric(ACC_sum$Sum_of_avg_across_patients)


  ACC_median<-cbind(ACC$Method_name,apply(ACC[-1], 1, median)) %>% as.data.frame()
  colnames(ACC_median)<-c("Method_name","Median_across_patients")

  write.csv(ACC_sum,paste0('sim',sim,'/Combined_data_sums_onlyACC.csv'),quote=F)
  write.csv(ACC_median,paste0('sim',sim,'/Combined_data_median_onlyACC.csv'),quote=F)

  #  calculate equal weights sums and medians
  AUC_half<-cbind(sapply(AUC[,-1],"*",0.5))
  ACC_half<-cbind(sapply(ACC[,-1],"*",0.5))
  equal<-cbind( AUC_half + ACC_half)


  equal_sum<-cbind(AUC$Method_name,rowSums(equal)) %>% as.data.frame()
  colnames(equal_sum)<-c("Method_name","Sum_of_avg_across_patients")
  equal_sum$Sum_of_avg_across_patients<-as.numeric(equal_sum$Sum_of_avg_across_patients)

  equal_median<-cbind(AUC$Method_name,apply(equal, 1, median)) %>% as.data.frame()
  colnames(equal_median)<-c("Method_name","Median_across_patients")


  write.csv(equal,paste0('sim',sim,'/Combined_data_equal.csv'),quote=F)
  write.csv(equal_sum,paste0('sim',sim,'/Combined_data_sums_equal.csv'),quote=F)
  write.csv(equal_median,paste0('sim',sim,'/Combined_data_median_equal.csv'),quote=F)


  #calculate 0.3 AUROC and 0.7 ACC weights sums and medians
  AUC_low<-cbind(sapply(AUC[,-1],"*",0.3))
  ACC_high<-cbind(sapply(ACC[,-1],"*",0.7))
  three_seven<-cbind( AUC_low + ACC_high)


  three_seven_sum<-cbind(AUC$Method_name,rowSums(three_seven)) %>% as.data.frame()
  colnames(three_seven_sum)<-c("Method_name","Sum_of_avg_across_patients")
  three_seven_sum$Sum_of_avg_across_patients<-as.numeric(three_seven_sum$Sum_of_avg_across_patients)

  three_seven_median<-cbind(AUC$Method_name,apply(three_seven, 1, median)) %>% as.data.frame()
  colnames(three_seven_median)<-c("Method_name","Median_across_patients")

  write.csv(three_seven,paste0('sim',sim,'/Combined_data_37.csv'),quote=F)
  write.csv(three_seven_sum,paste0('sim',sim,'/Combined_data_sums_37.csv'),quote=F)
  write.csv(three_seven_median,paste0('sim',sim,'/Combined_data_median_37.csv'),quote=F)

  #calculate 0.7 AUROC and 0.3 ACC weights sums and medianssums and medians}
  AUC_high<-cbind(sapply(AUC[,-1],"*",0.7))
  ACC_low<-cbind(sapply(ACC[,-1],"*",0.3))
  seven_three<-cbind( AUC_low + ACC_high)

  seven_three_sum<-cbind(AUC$Method_name,rowSums(seven_three)) %>% as.data.frame()
  colnames(seven_three_sum)<-c("Method_name","Sum_of_avg_across_patients")
  seven_three_sum$Sum_of_avg_across_patients<-as.numeric(seven_three_sum$Sum_of_avg_across_patients)


  seven_three_median<-cbind(AUC$Method_name,apply(seven_three, 1, median)) %>% as.data.frame()
  colnames(seven_three_median)<-c("Method_name","Median_across_patients")

  write.csv(seven_three,paste0('sim',sim,'/Combined_data_73.csv'),quote=F)
  write.csv(seven_three_sum,paste0('sim',sim,'/Combined_data_sums_73.csv'),quote=F)
  write.csv(seven_three_median,paste0('sim',sim,'/Combined_data_median_73.csv'),quote=F)
}

Step2: Compute Average Row Sums and Median values under different weights

Next, we compute the average Row Sums and Median values under the 5 AUROC and ACC weights.

patterns<-c("Combined_data_sums_onlyAUC","Combined_data_sums_onlyACC","Combined_data_sums_equal","Combined_data_sums_37","Combined_data_sums_73")

dir.create("averages")

for (pattern in patterns){
  count_files <- list.files(recursive=TRUE,pattern=paste0("^",pattern,".csv"),full.names=TRUE)
  d1<-data.frame()
  for (i in count_files) {
    # read file i as a data frame
    f <- read.csv(i,row.names = 1)
    if(length(d1) == 0){
      d1 <- f
    }else
    {
     d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients+f$Sum_of_avg_across_patients
    }
  }
    d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients/28
    d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients/50
    d1<- d1 %>% arrange(desc(Sum_of_avg_across_patients))
    write.csv(d1,paste0("averages/Average_",pattern,".csv"),quote=F)
}
patterns<-c("Combined_data_median_onlyAUC","Combined_data_median_onlyACC","Combined_data_median_equal","Combined_data_median_37","Combined_data_median_73")

for (pattern in patterns){
  count_files <- list.files(recursive=TRUE,pattern=paste0("^",pattern,".csv"),full.names=TRUE)
  count_files<-count_files[!grepl("all", count_files)]
  d1<-data.frame()
  for (i in count_files) {
    # read file i as a data frame
    f <- read.csv(i,row.names = 1)
    if(length(d1) == 0){
      d1 <- f
    }else
    {
     d1$Median_across_patients<-d1$Median_across_patients+f$Median_across_patients
    }
  }
    d1$Median_across_patients<-d1$Median_across_patients/28
    d1<- d1 %>% arrange(desc(Median_across_patients))
    write.csv(d1,paste0("averages/Average_Median_",pattern,".csv"),quote=F)
}
rm(list = ls())

Step3: Generate heatmaps of Average Row Sums and Median values under different weights

To identify the top performing network methods across all 28 networks and under different weights of the AUROC and ACC, we generate a heatmap with hierarchical clustering

pal<-rev(c("#FF0000","#0b5394","#38761d","#ffe700","#FFFFFF"))
count_files <- list.files(path="averages",recursive=TRUE,pattern=paste0("Average_Combined"),full.names=TRUE)

d1<-data.frame()
for (i in count_files){
  f<-read.csv(i,row.names = 1)

  if(length(d1) == 0){
    d1<-f
  }
  else{
    d1<- merge(d1, f, by.x = "Method_name", by.y = "Method_name")
  }

  }


rownames(d1)<-d1$Method_name
colnames(d1)<-c("Method_name","AUC=0.3,ACC=0.7","AUC=0.7,ACC=0.3","AUC=0.5,ACC=0.5","AUC=0,ACC=1","AUC=1,ACC=0")
d1<- d1 %>% select(-Method_name)


rowsums_top10<-rownames(d1 %>% arrange(desc(`AUC=0.5,ACC=0.5`)))[1:10]
rowsums_top4<-rownames(d1 %>% arrange(desc(`AUC=0.5,ACC=0.5`)))[1:4]

heatmaply(d1,colors = pal,show_dendrogram = c(TRUE, FALSE),labCol=c("Weight<sub>AUC=0.3</sub>,Weight<sub>ACC=0.7</sub>","Weight<sub>AUC=0.7</sub>,Weight<sub>ACC=0.3</sub>","Weight<sub>AUC=0.5</sub>,Weight<sub>ACC=0.5</sub>","Weight<sub>AUC=0</sub>,Weight<sub>ACC=1</sub>","Weight<sub>AUC=1</sub>,Weight<sub>ACC=0</sub>"),main="Average Row Sums Value across 28 Networks",file = "heatmap_rowsums.pdf",width=800,height=1000,margins = c(40, 50)) %>%
layout(height = 900)





















patterns<-c("Combined_data_median_onlyAUC","Combined_data_median_onlyACC","Combined_data_median_equal","Combined_data_median_37","Combined_data_median_73")

count_files <- list.files(path="averages",recursive=TRUE,pattern=paste0("Average_Median"),full.names=TRUE)

d1<-data.frame()
for (i in count_files){
  f<-read.csv(i,row.names = 1)

  if(length(d1) == 0){
    d1<-f
  }
  else{
    d1<- merge(d1, f, by.x = "Method_name", by.y = "Method_name")
  }

  }


rownames(d1)<-d1$Method_name
colnames(d1)<-c("Method_name","AUC=0.3,ACC=0.7","AUC=0.7,ACC=0.3","AUC=0.5,ACC=0.5","AUC=0,ACC=1","AUC=1,ACC=0")
d1<- d1 %>% select(-Method_name)

medians_top10<-rownames(d1 %>% arrange(desc(`AUC=0.5,ACC=0.5`)))[1:10]
medians_top4<-rownames(d1 %>% arrange(desc(`AUC=0.5,ACC=0.5`)))[1:4]

heatmaply(d1,colors = pal,show_dendrogram = c(TRUE, FALSE),labCol=c("Weight<sub>AUC=0.3</sub>,Weight<sub>ACC=0.7</sub>","Weight<sub>AUC=0.7</sub>,Weight<sub>ACC=0.3</sub>","Weight<sub>AUC=0.5</sub>,Weight<sub>ACC=0.5</sub>","Weight<sub>AUC=0</sub>,Weight<sub>ACC=1</sub>","Weight<sub>AUC=1</sub>,Weight<sub>ACC=0</sub>"),main="Average Median Value across 28 Networks",file = "heatmap_medians.pdf",width=800,height=1000,margins = c(40, 50))  %>%
  layout(height = 900)





















These heatmaps show that the top methods across all networks with all weights of AUROC and ACC are BCohF, BCorrU, PCohF, and Tigress

Step4: Generate line plots of Average Row Sums and Median AUROC and ACC values under different weights

We can plot how the individual top methods perform in all 28 networks individually using line plots under different weights of AUROC and ACC

create_rowsum_lineplot<-function(pattern){
  count_files <- list.files(recursive=T,pattern=paste0("^",pattern),full.names=TRUE)
  d1<-data.frame()
  for (i in count_files) {
    f <- read.csv(i,row.names = 1) %>% filter(Method_name %in% rowsums_top4)
    f<- cbind(f,c(rep(unlist(strsplit(i,'/'))[2],nrow(f))))
    colnames(f)[3]<-c('time_series')

    #if the counts object is empty just copy the f to m
    if(length(d1) == 0){
      d1 <- f

    } else
    {
      #if the dataframe is not empty then merge the data
      d1 <- rbind(d1,f)
    }
  }

  d1$time_series<-gsub("sim","",d1$time_series)
  d1$time_series<-str_pad(d1$time_series, 2, pad = "0")
  colnames(d1)<-c("Method","Row Sum","Network")
  d1$`Row Sum`<-d1$`Row Sum`/50

  return(d1)
}
create_median_lineplot<-function(pattern){
  count_files <- list.files(recursive=T,pattern=paste0("^",pattern),full.names=TRUE)
  d1<-data.frame()
  for (i in count_files) {
    f <- read.csv(i,row.names = 1) %>% filter(Method_name %in% rowsums_top4)
    f<- cbind(f,c(rep(unlist(strsplit(i,'/'))[2],nrow(f))))
    colnames(f)[3]<-c('time_series')

    #if the counts object is empty just copy the f to m
    if(length(d1) == 0){
      d1 <- f

    } else
    {
      #if the dataframe is not empty then merge the data
      d1 <- rbind(d1,f)
    }
  }

  d1$time_series<-gsub("sim","",d1$time_series)
  d1$time_series<-str_pad(d1$time_series, 2, pad = "0")
  colnames(d1)<-c("Method","Median","Network")

  return(d1)
}

We can plot how the top 4 methods perform against the other top 6 methods in all 28 networks individually using line plots under different weights of AUROC and ACC.